package org.longteng.mobileluck.orc;

import net.sourceforge.tess4j.TesseractException;
import net.sourceforge.tess4j.util.LoadLibs;
import org.springframework.beans.factory.annotation.Value;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;

import java.io.File;

/**
 * 使用tess4j抽取图片中的文字
 */
@Slf4j
public class ImgORGTesseract {
    // 创建tess对象
    private ITesseract instance = new Tesseract();

    @Value("${app.orc.tessdata-path:tessdata}")
    private String tessdataPath = "tessdata";

    /**
     * 图片读取文字信息
     * @param imgPath
     * @return
     */
    public  String getImgEngContent(String imgPath) {
        if(log.isDebugEnabled()) {
            log.debug("转换图片：{}\n  tessdata训练数据目录:{}, 识别的语种:{}",imgPath,this.tessdataPath,"eng");
        }
        File file = new File(imgPath);
        // File tessDataFolder = LoadLibs.extractTessResources(this.tessdataPath);  // 在：C:\Users\hymn.com\AppData\Local\Temp\tess4j\tessdata
        File tessDataFolder = new File(this.tessdataPath);
        log.debug("tessdataPath:{}", tessDataFolder.getAbsolutePath());
        instance.setDatapath(tessDataFolder.getAbsolutePath());  // 设置你的Tess4J下的tessdata目录
        instance.setLanguage("eng");  // 指定需要识别的语种
        // instance.setLanguage("chi_sim");         // 设置训练语言
        String result = null;
        try {
            result = instance.doOCR(file);
            log.debug("读取图片‘{}’ \n->数据为：{}", imgPath, result);
        }catch (TesseractException e) {
            log.error("读取图片数据异常。", e);
        }
        return  result;
    }
}
